#!/bin/bash

export BASE_DATA_PATH=/home/mixagol/data
export BASE_DATE=20120802
export CUR_DATA_DIR=${BASE_DATA_PATH}/2_raw_matrix_find/${BASE_DATE}/blast
export LC_ALL=C

export RESULTS_DIR=/home/mixagol/work/data/results_search_genes_in_genomes_04/
mkdir -p $RESULTS_DIR/output

2_raw_matrix_blast/run_blast_cluster.sh $RESULTS_DIR/output/

mkdir -p ${CUR_DATA_DIR}/output/

find ${RESULTS_DIR}/ -type f | xargs -I {} mv {} ${CUR_DATA_DIR}/output/

ls ${CUR_DATA_DIR}/output/* | xargs zcat | 2_raw_matrix_blast/convert_output.py | gzip > ${CUR_DATA_DIR}/results.txt.gz

zcat ${CUR_DATA_DIR}/results.txt.gz \
    | awk -F'\t' '$3<0.05' \
    | sort -t$'\t' -S2G -T ${CUR_DATA_DIR} -k1,1 | gzip \
    > ${CUR_DATA_DIR}/results_0.05_sorted.txt.gz

